home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Libris Britannia 4
/
science library(b).zip
/
science library(b)
/
DDJMAG
/
DDJ9310.ZIP
/
UNDOC.ZIP
/
TOPICDMP.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-08-04
|
14KB
|
374 lines
/* TOPICDMP.C -- Dumps topic file from a Windows .HLP or .MVB file.
Pete Davis, August 1993
With some modifications by Andrew Schulman, September 1993
From Dr. Dobb's Journal, October 1993 */
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <ctype.h>
#include <limits.h>
#pragma pack(1) /* Make sure we get byte alignment */
#include "whstruct.h"
#include "topicdmp.h"
HELPHEADER HelpHeader; /* Header for Help file. */
WHIFSBTREEHEADER WHIFSHeader; /* WHIFS Header record */
int WHIFSLeafOne = -1; /* First WHIFS Leaf Node */
long FirstPageLoc; /* Used by macros for b-trees */
char *PhrasesPtr;
int Compressed; /* Is there compression? */
#define MSG(s) { puts(s); return; }
#define FAIL(s) { puts(s); exit(1); }
#define GET_STRING(f, s) \
{ char *p = (char *)(s); while (*p++ = fgetc(f)) ; *p = 0; }
#define BIT_SET(map, bit) (((map) & (1 << (bit))) ? 1 : 0)
// Finds the first leaf in the WHIFS B-Tree
void WHIFSGetFirstLeaf(FILE *HelpFile) {
int CurrLevel = 1; /* Current Level in B-Tree */
BTREEINDEXHEADER CurrNode; /* Current Node in B-Tree */
int NextPage = 0; /* Next Page to go to */
/* Go to the beginning of WHIFS B-Tree */
fseek(HelpFile, HelpHeader.WHIFS, SEEK_SET);
fread(&WHIFSHeader, sizeof(WHIFSHeader), 1, HelpFile);
FirstPageLoc = HelpHeader.WHIFS + sizeof(WHIFSHeader);
GotoWHIFSPage(WHIFSHeader.RootPage); // macro in WHSTRUCT.H
/* Find First Leaf */
while (CurrLevel < WHIFSHeader.NLevels) {
fread(&CurrNode, sizeof(CurrNode), 1, HelpFile);
/* Next Page is conveniently the first byte of the page */
fread(&NextPage, sizeof(int), 1, HelpFile);
GotoWHIFSPage(NextPage);
CurrLevel++;
}
/* First Leaf page is here */
WHIFSLeafOne = NextPage;
}
// Get a WHIFS file by file number; returns offset and filename
void GetFile(FILE *HelpFile, DWORD Number, long *Offset, char *Name) {
BTREENODEHEADER CurrentNode;
DWORD CurrPage, counter = 0;
char c, TempFile[19];
/* Skip pages we don't need */
CurrentNode.NextPage = WHIFSLeafOne;
do {
CurrPage = CurrentNode.NextPage;
GotoWHIFSPage(CurrPage);
fread(&CurrentNode, sizeof(CurrentNode), 1, HelpFile);
counter += CurrentNode.NEntries;
} while (counter < Number);
for (counter -= CurrentNode.NEntries; counter <= Number; counter++) {
GET_STRING(HelpFile, TempFile);
fread(Offset, sizeof(long), 1, HelpFile);
}
strcpy(Name, TempFile);
}
// Get SysHeader to see if compression used on help file
void SysLoad(FILE *HelpFile, long FileStart) {
SYSTEMHEADER SysHeader;
FILEHEADER FileHdr;
fseek(HelpFile, FileStart, SEEK_SET);
fread(&FileHdr, sizeof(FileHdr), 1, HelpFile);
fread(&SysHeader, sizeof(SysHeader), 1, HelpFile);
if (SysHeader.Revision != 21)
FAIL("Sorry, TOPICDMP only works with Windows 3.1 help files");
Compressed = (SysHeader.Flags & COMPRESSION_310) ||
(SysHeader.Flags & COMPRESSION_UNKN);
}
// Decides how many bytes to read, depending on number of bits set
int BytesToRead(BYTE BitMap) {
int TempSum, counter;
TempSum = 8;
for (counter = 0; counter < 8; counter ++)
TempSum += BIT_SET(BitMap, counter);
return TempSum;
}
// Decompresses the data using Microsoft's LZ77 derivative.
long Decompress(FILE *HelpFile, long CompSize, char *Buffer) {
long InBytes = 0; /* How many bytes read in */
WORD OutBytes = 0; /* How many bytes written out */
BYTE BitMap, Set[16]; /* Bitmap and bytes associated with it */
long NumToRead; /* Number of bytes to read for next group */
int counter, Index; /* Going through next 8-16 codes or chars */
int Length, Distance; /* Code length and distance back in 'window' */
char *CurrPos; /* Where we are at any given moment */
char *CodePtr; /* Pointer to back-up in LZ77 'window' */
CurrPos = Buffer;
while (InBytes < CompSize) {
BitMap = (BYTE) fgetc(HelpFile);
NumToRead = BytesToRead(BitMap);
if ((CompSize - InBytes) < NumToRead)
NumToRead = CompSize - InBytes; // only read what we have left
fread(Set, 1, (int) NumToRead, HelpFile);
InBytes += NumToRead + 1;
/* Go through and decode data */
for (counter = 0, Index = 0; counter < 8; counter++) {
/* It's a code, so decode it and copy the data */
if (BIT_SET(BitMap, counter)) {
Length = ((Set[Index+1] & 0xF0) >> 4) + 3;
Distance = (256 * (Set[Index+1] & 0x0F)) + Set[Index] + 1;
CodePtr = CurrPos - Distance; // ptr into decompress window
while (Length)
{ *CurrPos++ = *CodePtr++; OutBytes++; Length--; }
Index += 2; /* codes are 2 bytes */
}
else
{ *CurrPos++ = Set[Index++]; OutBytes++; }
}
}
return OutBytes;
}
// Prints a Phrase from the Phrase table
void PrintPhrase(char *Phrases, int PhraseNum) {
int *Offsets = (int *)Phrases;
char *p = Phrases+Offsets[PhraseNum];
while (p < Phrases + Offsets[PhraseNum + 1])
{ putchar(*p); p++; }
}
// Build up a table of phrases
void PhrasesLoad(FILE *HelpFile, long FileStart) {
FILEHEADER FileHdr;
PHRASEHDR PhraseHdr;
int *Offsets;
char *Phrases;
long DeCompSize;
/* Go to the phrases file and get the headers */
fseek(HelpFile, FileStart, SEEK_SET);
fread(&FileHdr, sizeof(FileHdr), 1, HelpFile);
fread(&PhraseHdr, sizeof(PhraseHdr), 1, HelpFile);
/* Allocate space and decompress if it's compressed, else read in. */
if (Compressed) {
if ((Offsets = malloc((unsigned) (PhraseHdr.PhrasesSize +
(PhraseHdr.NumPhrases + 1) * 2))) == NULL)
MSG("No room to decompress |Phrases");
Phrases = Offsets + fread(Offsets,2,PhraseHdr.NumPhrases+1, HelpFile);
DeCompSize = Decompress(HelpFile, (long)FileHdr.FileSize -
(sizeof(PhraseHdr) + 2 * (PhraseHdr.NumPhrases+1)), Phrases);
if (DeCompSize != PhraseHdr.PhrasesSize) {
printf("\n");
}
}
else {
if (!(Offsets=malloc((unsigned)(FileHdr.FileSize-sizeof(PhraseHdr)))))
MSG("No room to decompress |Phrases");
/* Backup 4 bytes for uncompressed Phrases (no PhrasesSize) */
fseek(HelpFile, -4, SEEK_CUR);
fread(Offsets, (unsigned) (FileHdr.FileSize - 4), 1, HelpFile);
}
PhrasesPtr = Phrases = (char *) Offsets;
}
/* Because the topic file is broken into 4k blocks, we'll have to handle
all the reads. The idea is to filter out the TOPICBLOCKHEADERs and
do any decompression that needs doing. */
long TopicRead(BYTE *Dest, long NumBytes, FILE *HelpFile) {
static long CurrBlockLoc = 0; /* Where we are in the block */
static BYTE *DCmpBlock = NULL; /* Block of uncompressed data */
static long DecompSize; /* Size of block after decomp */
static long TopicStart, BlkNum; /* Start of |TOPIC file */
long BytesLeft; /* # Bytes left to return */
TOPICBLOCKHEADER BlockHeader;
TOPICLINK *TempLink;
long EndOffset;
/* If NumBytes = 0, then we're done and need to free memory */
if (NumBytes == -1) { free(DCmpBlock); return 0; }
if (!DCmpBlock) {
if (Compressed) {
if (! (DCmpBlock = malloc((unsigned) (4 * TopicBlockSize))))
FAIL("Not enough memory to decompress |TOPIC file");
TopicStart = ftell(HelpFile);
BlkNum = 0;
}
else if (! (DCmpBlock = malloc((unsigned) TopicBlockSize)))
FAIL("Not enough memory to handle |TOPIC file");
DecompSize = 0; /* Set initial size to 0 */
/* Don't really need first block header, so get it out of the way */
fread(&BlockHeader, sizeof(BlockHeader), 1, HelpFile);
}
BytesLeft = NumBytes;
while (BytesLeft) {
if (DecompSize == CurrBlockLoc) {
BlkNum++;
if (Compressed) {
DecompSize = Decompress(HelpFile, (long)TopicBlockSize-1,
(char *)DCmpBlock);
/* Align ourselves at next 4k block */
fseek(HelpFile, TopicStart + (4096L * BlkNum), SEEK_SET);
}
else
DecompSize=fread(DCmpBlock,1,(unsigned) TopicBlockSize, HelpFile);
CurrBlockLoc = 0;
fread(&BlockHeader, sizeof(BlockHeader), 1, HelpFile);
// Get offset of last topic link. (Don't need block #, hence 3FFFh)
EndOffset = BlockHeader.LastTopicLink & 0x3FFF;
TempLink = (TOPICLINK*)(DCmpBlock + EndOffset-sizeof(BlockHeader));
/* Actual end of the data (Don't include header) */
EndOffset += (TempLink->BlockSize - sizeof(BlockHeader));
// If end shorter than topic block use it; else topic block full
if (EndOffset > DecompSize) {
/* Adjust DecompSize if crossing 4k boundary */
EndOffset = TempLink->BlockSize-((TempLink->NextBlock) & 0x3FFF);
DecompSize = (BlockHeader.LastTopicLink & 0x3FFF) + EndOffset;
}
else DecompSize = EndOffset;
} /* If */
*(Dest++) = *(DCmpBlock + (CurrBlockLoc++) );
BytesLeft--;
} /* While (BytesLeft) */
return NumBytes;
}
// Displays a string from a topic link record. Checks for Phrase
// replacement and non-printable chars
void TopicStringPrint(char *String, long Length) {
BYTE Byte1, Byte2;
int CurChar, PhraseNum;
long counter;
for (counter = 0; counter < Length; counter++) {
CurChar = * ((char *) (String + counter));
/* Check for Phrase replacement! */
if ((CurChar > 0) && (CurChar < 10)) {
Byte1 = (BYTE) CurChar;
counter++;
CurChar = * ((char *) (String + counter));
Byte2 = (BYTE) CurChar;
PhraseNum = (256 * (Byte1 - 1) + Byte2);
/* If there's a remainder, we have a space after the phrase */
PrintPhrase(PhrasesPtr, PhraseNum / 2);
if (PhraseNum % 2) putchar(' ');
}
else if (isprint(CurChar)) putchar(CurChar);
else putchar(' '); // could do newline for 0x00 0x00
}
}
// Dump |TOPIC file, doing decompression and phrase substitution
void TopicDump(FILE *HelpFile, long FileStart) {
FILEHEADER FileHdr;
TOPICHEADER *TopicHdr;
TOPICLINK TopicLink;
/* Go to the TOPIC file and get the headers */
fseek(HelpFile, FileStart, SEEK_SET);
fread(&FileHdr, sizeof(FileHdr), 1, HelpFile);
do {
TopicRead((BYTE *) &TopicLink, sizeof(TopicLink) - 4, HelpFile);
if (Compressed)
TopicLink.DataLen2 = TopicLink.BlockSize - TopicLink.DataLen1;
TopicLink.LinkData1=(BYTE *) malloc((unsigned)(TopicLink.DataLen1-21));
if(!TopicLink.LinkData1)
MSG("Error allocating TopicLink.LinkData1");
TopicRead(TopicLink.LinkData1, TopicLink.DataLen1 - 21, HelpFile);
if (TopicLink.DataLen2 > 0) {
TopicLink.LinkData2=(BYTE*)malloc((unsigned)(TopicLink.DataLen2+1));
if(!TopicLink.LinkData2)
MSG("Error allocating TopicLink.LinkData2");
TopicRead(TopicLink.LinkData2, TopicLink.DataLen2, HelpFile);
}
/* Display a Topic Header record */
if (TopicLink.RecordType == TL_TOPICHDR) {
TopicHdr = (TOPICHEADER *)TopicLink.LinkData1;
printf("================ Topic Block Data ====================\n");
printf("Topic#: %ld - ", TopicHdr->TopicNum);
if (TopicLink.DataLen2 > 0)
TopicStringPrint(TopicLink.LinkData2, (long) TopicLink.DataLen2);
else printf("\n");
}
/* Show a 'text' type record. */
else if (TopicLink.RecordType == TL_DISPLAY) {
printf("-- Topic Link Data\n");
TopicStringPrint(TopicLink.LinkData2, (long) TopicLink.DataLen2);
}
printf("\n\n");
free(TopicLink.LinkData1);
if (TopicLink.DataLen2 > 0) free(TopicLink.LinkData2);
} while(TopicLink.NextBlock != -1);
}
void DumpFile(FILE *HelpFile) {
long FileOffset, PhraseOffset, TopicOffset;
DWORD i;
char FileName[32];
fread(&HelpHeader, sizeof(HelpHeader), 1, HelpFile);
if (HelpHeader.MagicNumber != 0x35F3FL)
MSG("Fatal Error: Not a valid WinHelp file");
WHIFSGetFirstLeaf(HelpFile);
TopicOffset = PhraseOffset = 0;
for (i=0; i<WHIFSHeader.TotalWHIFSEntries; i++) {
GetFile(HelpFile, i, &FileOffset, FileName);
if (! strcmp(FileName, "|SYSTEM")) SysLoad(HelpFile, FileOffset);
else if (! strcmp(FileName, "|Phrases")) PhraseOffset = FileOffset;
else if (! strcmp(FileName, "|TOPIC")) TopicOffset = FileOffset;
}
if (PhraseOffset) PhrasesLoad(HelpFile, PhraseOffset);
if (TopicOffset) TopicDump(HelpFile, TopicOffset);
else MSG("No Topic file found!");
}
int main(int argc, char *argv[]) {
char filename[40];
FILE *HelpFile;
if (argc < 2) {
printf("Usage: TOPICDMP helpfile[.hlp]\n\n");
printf(" helpfile - Name of help file (.HLP or .MVB)\n\n");
return EXIT_FAILURE;
}
if (! strchr(strcpy(filename, strupr(argv[1])), '.'))
strcat(filename, ".HLP");
if ((HelpFile = fopen(filename, "rb")) == NULL) {
printf("Can't open %s!", filename);
return EXIT_FAILURE;
}
DumpFile(HelpFile);
fclose(HelpFile);
return EXIT_SUCCESS;
}